I'm gonna overwrite a lot of this notebook's old content. I changed the way I'm calculating wt, and wanna test that my training worked.
In [1]:
from pearce.emulator import OriginalRecipe, ExtraCrispy, SpicyBuffalo
from pearce.mocks import cat_dict
import numpy as np
from os import path
In [2]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
In [3]:
training_file = '/scratch/users/swmclau2/xi_gm_cosmo/PearceRedMagicXiGMCosmoFixedNd.hdf5'
#training_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmoFixedNdLowMsat.hdf5'
test_file = '/scratch/users/swmclau2/xi_gm_cosmo_test/PearceRedMagicXiGMCosmoFixedNdTest.hdf5'
#test_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmoFixedNdLowMsatTest.hdf5'
em_method = 'gp'
split_method = 'random'
In [4]:
a = 1.0
z = 1.0/a - 1.0
In [5]:
fixed_params = {'z':z}#, 'cosmo': 0}#, 'r':24.06822623}
In [6]:
v = np.loadtxt('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_result_xigm.npy')
In [7]:
param_names = ['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff', 'logM0', 'sigma_logM', 'logM1', 'alpha']
In [8]:
pnames = ['bias', 'amp']
pnames.extend(param_names)
pnames.append('amp')
pnames.extend(param_names)
In [9]:
from collections import defaultdict
metric = defaultdict(list)
for val, pname in zip(v, pnames):
metric[pname].append(val)
In [10]:
np.random.seed(0)
emu = SpicyBuffalo(training_file, method = em_method,hyperparams = {'metric':metric}, fixed_params=fixed_params,
custom_mean_function = 'linear', downsample_factor = 0.1)
In [11]:
emu.scale_bin_centers
Out[11]:
In [12]:
rbins = np.logspace(-1.1, 1.6, 19)
print (rbins[1:]+rbins[:-1])/2
In [13]:
params = {}
for pname in emu.get_param_names():
if pname == 'r':
continue
low, high = emu.get_param_bounds(pname)
params[pname] = np.random.uniform(low, high)
print params
In [14]:
pred_y = emu.emulate_wrt_r(params)[0]
print pred_y
In [15]:
plt.plot(emu.scale_bin_centers, pred_y)
plt.xscale('log')
In [16]:
gof = emu.goodness_of_fit(test_file, statistic = 'frac')
#print gof.mean(), np.median(gof)
for g in gof:
print g.mean(), np.median(g)
In [17]:
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)
In [18]:
test_x, _, _, _ = emu.get_data(test_file, fixed_params=fixed_params)
train_x, _, _, _ = emu.get_data(training_file, fixed_params=fixed_params)
emu.get_param_names()
Out[18]:
In [19]:
print test_x.shape, train_x.shape
print test_x.shape[0]/(7*emu.n_bins), train_x.shape[0]/(40*emu.n_bins)
In [20]:
idx1, idx2 = -5, -2
plt.scatter(test_x[0:1000*emu.n_bins:emu.n_bins, idx1], test_x[0:1000*emu.n_bins:emu.n_bins, idx2], alpha = 0.5)
plt.scatter(train_x[0:1000*emu.n_bins:emu.n_bins, idx1], train_x[0:1000*emu.n_bins:emu.n_bins, idx2], color = 'g', alpha = 0.5)
Out[20]:
In [21]:
resmat = np.zeros( (len(pred_y), pred_y[0].shape[0]))
for bin_no in xrange(len(pred_y)):
py, dy = pred_y[bin_no], data_y[bin_no]
resmat[bin_no] = 10**py - 10**dy
#print np.sqrt(np.mean( (py - dy)**2)*py.shape[0]/(py.shape[0]-1) )
In [22]:
scovmat = resmat.dot(resmat.T)/(len(pred_y[0])-1)
In [23]:
print np.sqrt(np.diag(scovmat))
In [52]:
%%bash
cp /home/users/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/default_metrics.pkl /home/users/swmclau2/Git/pearce/emulator/default_metrics.pkl
In [53]:
#print g.mean(), np.median(g)
In [54]:
plt.hist(y)
In [ ]:
plt.hist(pred_y)
In [ ]:
plt.hist(emu.y)
In [ ]:
plt.hist(np.log10(gof));
In [ ]:
plt.hist(emu.y)
In [ ]:
for i in xrange(50):
params = {}
for pname in emu.get_param_names():
if pname == 'r':
continue
low, high = emu.get_param_bounds(pname)
params[pname] = np.random.uniform(low, high)
pred_y = emu.emulate(params)[0]
print pred_y
#print params
In [ ]:
for i, (g, r) in enumerate(zip(gof, emu.scale_bin_centers)):
print r, g.mean(), np.median(g)
#plt.hist(np.log10(g))
#plt.show()
In [ ]:
n_cosmo_params = 7
loo_cosmo = emu.x[0, 0, :n_cosmo_params]
loo_cosmo_idxs = np.all(emu.x[:, :,:n_cosmo_params] == loo_cosmo, axis =2)
train_x, train_y, train_yerr = emu.x[~loo_cosmo_idxs, :], emu.y[ ~loo_cosmo_idxs], emu.yerr[ ~loo_cosmo_idxs]
test_x, test_y, test_yerr = emu.x[loo_cosmo_idxs, :], emu.y[loo_cosmo_idxs], emu.yerr[loo_cosmo_idxs]
In [ ]:
model = emu._emulator
model.compute(train_x, train_yerr)
In [ ]:
pred_y = model.predict(train_y, test_x, False, False, False)*emu._y_std + emu._y_mean
In [ ]:
np.mean(np.abs((pred_y-test_y)/test_y))
#np.mean(np.abs((pred_y-train_y)/train_y))
In [ ]:
queue_skipper: True
system: sherlock
n_jobs: 400
max_time: 6
resids = np.abs(emu.y*emu._y_std+emu._y_mean - ypred)
In [ ]:
np.mean(resids/(emu.y*emu._y_std+emu._y_mean))
In [ ]:
ypred.mean(), emu._y_mean
In [ ]:
test_gof = emu.goodness_of_fit(test_file, statistic = 'log_frac')
print test_gof.mean()
In [ ]:
test_gof = emu.goodness_of_fit(test_file, statistic = 'frac')
print test_gof.mean()
In [ ]:
plt.hist(np.log10(test_gof));
In [ ]:
test_x
In [ ]:
(emu.x*emu._x_std) + emu._x_mean
In [ ]:
emu.get_param_names()
In [ ]:
test_x_white, test_y_white = (test_x - emu._x_mean)/(emu._x_std + 1e-5), (test_y - emu._y_mean)/(emu._y_std + 1e-5)
In [ ]:
model = emu._emulator
In [ ]:
pred_y_white = model.predict(emu.y, test_x_white, False, False, False)
In [ ]:
pred_y = pred_y_white*emu._y_std + emu._y_mean
In [ ]:
plt.plot(pred_y[:100], label = 'pred')
plt.plot(test_y[:100], label = 'truth')
plt.legend(loc = 'best')
In [ ]:
test_y.mean(), emu._y_mean, pred_y.mean()
In [ ]:
test_y.std(), emu._y_std, pred_y.std()
In [ ]:
plt.hist(pred_y_white, bins = np.linspace(-3, 3, 100), label = 'Pred')
plt.hist(test_y_white, bins = np.linspace(-3, 3, 100), label = 'Test', alpha = 0.4);
plt.legend(loc = 'best')
In [ ]:
In [ ]: